notebook.community

Edit and run



In [40]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Chisquare distribution



In [41]:

    
#Generate a random variable with 100 datapoints for a Chisquare distribution
chisquare = np.random.chisquare(2, 100)



In [42]:

    
#plot Chisquare distribution using a histogram
plt.hist(chisquare)
plt.show()



In [43]:

    
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(chisquare)
plt.axvline(chisquare.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(chisquare.mean() + chisquare.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(chisquare.mean()-chisquare.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [44]:

    
#Evaluate descriptive statistics against data
c1 = np.mean(chisquare)
c2 = np.median(chisquare)
c3= np.var(chisquare)
c4 = np.std(chisquare,ddof=1)
c5 = np.std(chisquare ,ddof=1) / np.sqrt(len(chisquare))

print('chisquare')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['chisquare'] = np.random.choice(chisquare, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)









    



chisquare
2.1206786374
1.48676337177
4.49504840255
2.13083385834
0.213083385834






    












    



chisquare    2.207264
dtype: float64
chisquare    2.237704
dtype: float64

Dirichlet distribution



In [45]:

    
#Generate a random variable with 100 datapoints for a Chisquare distribution
dirichlet = np.random.dirichlet((1,2), 100)



In [46]:

    
#plot Dirichlet distribution using a histogram
plt.hist(dirichlet)
plt.show()



In [47]:

    
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(dirichlet)
plt.axvline(dirichlet.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(dirichlet.mean() + dirichlet.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(dirichlet.mean()-dirichlet.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [48]:

    
#Evaluate descriptive statistics against data
d1 = np.mean(dirichlet)
d2 = np.median(dirichlet)
d3= np.var(dirichlet)
d4 = np.std(dirichlet,ddof=1)
d5 = np.std(dirichlet ,ddof=1) / np.sqrt(len(dirichlet))

print('dirichlet')
print(d1)
print(d2)
print(d3)
print(d4)
print(d5)









    



dirichlet
0.5
0.5
0.0938408165359
0.307103204848
0.0307103204848

Standard t distribution



In [49]:

    
#Generate a random variable with 100 datapoints for a standard t distribution
standard_t = np.random.standard_t(50, 100)



In [50]:

    
#plot Standard t distribution using a histogram
plt.hist(standard_t)
plt.show()



In [51]:

    
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(standard_t)
plt.axvline(standard_t.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(standard_t.mean() + standard_t.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(standard_t.mean()-standard_t.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [52]:

    
#Evaluate descriptive statistics against data
c1 = np.mean(standard_t)
c2 = np.median(standard_t)
c3= np.var(standard_t)
c4 = np.std(standard_t,ddof=1)
c5 = np.std(standard_t ,ddof=1) / np.sqrt(len(standard_t))

print('standard_t')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['standard_t'] = np.random.choice(standard_t, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)









    



standard_t
-0.0152853606037
0.0733338216136
1.087539194
1.04810516571
0.104810516571






    












    



standard_t   -0.038865
dtype: float64
standard_t    0.971557
dtype: float64

Logistic distribution



In [53]:

    
#Generate a random variable with 100 datapoints for logistic distribution
logistic = np.random.logistic(9,2, 100)



In [54]:

    
#plot logistic distribution using a histogram
plt.hist(logistic)
plt.show()



In [55]:

    
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(logistic)
plt.axvline(logistic.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(logistic.mean() + logistic.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(logistic.mean()-logistic.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [56]:

    
#Evaluate descriptive statistics against data
c1 = np.mean(logistic)
c2 = np.median(logistic)
c3= np.var(logistic)
c4 = np.std(logistic,ddof=1)
c5 = np.std(logistic ,ddof=1) / np.sqrt(len(logistic))

print('standard_t')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['logistic'] = np.random.choice(logistic, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)









    



standard_t
9.29508652954
9.53696067606
15.2993318944
3.93114113209
0.393114113209






    












    



logistic    9.273761
dtype: float64
logistic    3.940146
dtype: float64

Rayleigh distribution



In [57]:

    
#Generate a random variable with 100 datapoints for rayleigh distribution
rayleigh = np.random.rayleigh(1, 100)



In [58]:

    
#plot Rayleigh distribution using a histogram
plt.hist(rayleigh)
plt.show()



In [59]:

    
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(rayleigh)
plt.axvline(rayleigh.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rayleigh.mean() + rayleigh.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rayleigh.mean()-rayleigh.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [60]:

    
#Evaluate descriptive statistics against data
c1 = np.mean(rayleigh)
c2 = np.median(rayleigh)
c3= np.var(rayleigh)
c4 = np.std(rayleigh,ddof=1)
c5 = np.std(rayleigh ,ddof=1) / np.sqrt(len(rayleigh))

print('rayleigh')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['rayleigh'] = np.random.choice(rayleigh, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)









    



rayleigh
1.16842910258
0.959254226449
0.511401535514
0.718726100535
0.0718726100535






    












    



rayleigh    1.281252
dtype: float64
rayleigh    0.78202
dtype: float64

Geometric distribution



In [61]:

    
#Generate a random variable with 100 datapoints for Geometric distribution
geometric = np.random.geometric(0.25, 100)



In [62]:

    
#plot Geometric distribution using a histogram
plt.hist(geometric)
plt.show()



In [63]:

    
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(geometric)
plt.axvline(geometric.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(geometric.mean() + geometric.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(geometric.mean()-geometric.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [64]:

    
#Evaluate descriptive statistics against data
c1 = np.mean(geometric)
c2 = np.median(geometric)
c3= np.var(geometric)
c4 = np.std(geometric,ddof=1)
c5 = np.std(geometric ,ddof=1) / np.sqrt(len(geometric))

print('geometric')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['geometric'] = np.random.choice(geometric, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)









    



geometric
3.92
3.0
9.6136
3.11620074301
0.311620074301






    












    



geometric    3.99
dtype: float64
geometric    2.931869
dtype: float64



In [65]:

    
#Generation of 2 normally-distributed variables, one with mean 5 and standard deviation of 0.5 and the other with mean of 10 and standar deviation of 1
rand1 = np.random.normal(5, 0.5, 1000)
rand2 = np.random.normal(10,1, 1000)



In [66]:

    
#Generate a 3rd variable adding the 2 normallt distributed variables
rand3= rand1+rand2



In [67]:

    
#Plot histogram of the 3rd variable
plt.hist(rand3, bins=20, color='c')
plt.show()



In [68]:

    
#Compute and plot the mean and standard deviation for variable rand3
plt.hist(rand3, bins=20, color='c')
plt.axvline(rand3.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand3.mean() + rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand3.mean()-rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [69]:

    
#Calculate descriptive statistics for variable rand3
a = np.mean(rand3)
b = np.median(rand3)
c= np.var(rand3)
d = np.std(rand3,ddof=1)
e = np.std(rand3 ,ddof=1) / np.sqrt(len(rand3))
print(a)
print(b)
print(c)
print(d)
print(e)









    



15.0740718589
15.0825946634
1.20027793015
1.09612016201
0.0346623630118



In [70]:

    
#Plot a sample for rand3
sample=pd.DataFrame()
sample['variable3'] = np.random.choice(rand3, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
f = sample.mean()
g = sample.std(ddof=1)
print(f)
print(g)









    












    



variable3    15.144762
dtype: float64
variable3    1.114334
dtype: float64



In [71]:

    
#Plot histogram of the 3 variabless
plt.hist(rand1, bins=20, color = 'b')
plt.hist(rand2, bins=20, color='r')
plt.hist(rand3, bins=20, color='c')
plt.show()



In [72]:

    
#Compute and plot the mean and standard deviation for each of the variables
plt.hist(rand1, bins=20, color = 'b')
plt.hist(rand2, bins=20, color='r')
plt.hist(rand3, bins=20, color='c')

plt.axvline(rand1.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand1.mean() + rand1.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand1.mean()-rand1.std(), color='b', linestyle='dashed', linewidth=2)

plt.axvline(rand2.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand2.mean() + rand2.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand2.mean()-rand2.std(), color='b', linestyle='dashed', linewidth=2)

plt.axvline(rand3.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand3.mean() + rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand3.mean()-rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [73]:

    
#Evaluate descriptive statistics against data

a2 = np.mean(rand2)
b2 = np.median(rand2)
c2= np.var(rand2)
d2 = np.std(rand2,ddof=1)
e2 = np.std(rand2 ,ddof=1) / np.sqrt(len(rand2))

print('rand2')
print(a2)
print(b2)
print(c2)
print(d2)
print(e2)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['variable2'] = np.random.choice(rand2, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
f2 = sample.mean()
g2 = sample.std(ddof=1)
print(f2)
print(g2)









    



rand2
10.0572601977
10.0314504823
0.973282720879
0.98704456731
0.031213089848






    












    



variable2    10.112494
dtype: float64
variable2    0.99336
dtype: float64



In [74]:

    
#Evaluate descriptive statistics against data
a1 = np.mean(rand1)
b1 = np.median(rand1)
c1= np.var(rand1)
d1 = np.std(rand1,ddof=1)
e1 = np.std(rand1 ,ddof=1) / np.sqrt(len(rand1))

print('rand1')
print(a1)
print(b1)
print(c1)
print(d1)
print(e1)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['variable1'] = np.random.choice(rand1, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
f1 = sample.mean()
g1 = sample.std(ddof=1)
print(f1)
print(g1)









    



rand1
5.01681166115
5.01573361938
0.253057309079
0.503299731471
0.015915734972






    












    



variable1    4.888688
dtype: float64
variable1    0.491917
dtype: float64



In [ ]: